#Packages applied

library("twitteR")
## Warning: package 'twitteR' was built under R version 4.2.2
library(tm)
## Warning: package 'tm' was built under R version 4.2.2
## Loading required package: NLP
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:twitteR':
## 
##     id, location
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("plotly")
## Warning: package 'plotly' was built under R version 4.2.2
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
## 
##     annotate
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(RColorBrewer)
library(tidytext)
## Warning: package 'tidytext' was built under R version 4.2.2
library(stringr)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.2
library(rtweet)
## Warning: package 'rtweet' was built under R version 4.2.2
## 
## Attaching package: 'rtweet'
## The following object is masked from 'package:twitteR':
## 
##     lookup_statuses
library(corpus)
## Warning: package 'corpus' was built under R version 4.2.2
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 4.2.2
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 4.2.2
library(syuzhet)
## Warning: package 'syuzhet' was built under R version 4.2.2
## 
## Attaching package: 'syuzhet'
## The following object is masked from 'package:rtweet':
## 
##     get_tokens

##1. Extract from twitter using your developer’s credentials. Choose any keyword you want

CONSUMER_SECRET <- "gbuzairwHJDlzG6zmK3fFxqcEo2GIHbRy89NISU80IaqvVhjIx"
CONSUMER_KEY <- "s2yMTMlykz9iJ2I86kHzWsqtF"
ACCESS_SECRET <- "znEI53FQ3P1xhF3BAfvwiptXKtTMSAmD9BTzozQD8ax98"
ACCESS_TOKEN <- "1598161218618867713-qojQanThDqRQOlGJ4YcCXhFkYDx1gr"

#Connect to twitter link

setup_twitter_oauth(consumer_key = CONSUMER_KEY,
                    consumer_secret = CONSUMER_SECRET,
                    access_token = ACCESS_TOKEN,
                    access_secret = ACCESS_SECRET)
## [1] "Using direct authentication"

#Get 10000 observations “excluding retweets.

TrendTweets <- searchTwitter("#wednesdaynetflix -filter:retweets",
                             n = 10000,
                             lang = "en",
                             since = "2022-11-23",
                             until = "2022-11-30",
                             retryOnRateLimit=120)
TrendingTweetsDF <- twListToDF(TrendTweets)
save(TrendingTweetsDF,file = "TrendingTweetDF.Rdata")

Or

#Existed data file(set the location folder)

setwd("C:/CS101_DATA_SCIENCE/Nalaza_Repo/Individual Project/Individual Project 1")

#use the data file

load(file = "TrendingTweetDF.Rdata")
sapply(TrendingTweetsDF, function(x) sum(is.na(x)))
##          text     favorited favoriteCount     replyToSN       created 
##             0             0             0          9285             0 
##     truncated    replyToSID            id    replyToUID  statusSource 
##             0          9434             0          9285             0 
##    screenName  retweetCount     isRetweet     retweeted     longitude 
##             0             0             0             0         10000 
##      latitude 
##         10000

#Plot the trend retweets with number of tweets and date.

ggplot(data = tweetsDF, aes(x = created), fill = tweetsDF) +
  geom_histogram(aes(fill = ..count..)) + 
  theme(legend.position="right",
        axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)) +
   xlab("Time") + ylab("Number of tweets") + 
  scale_fill_gradient(low = "black", high = "midnightblue") + 
  ggtitle("Trendtweets #wednesdaynetflix")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#summary of date created.

tweetsDF %>% group_by(1) %>%  
  summarise(max = max(created), min = min(created))
## # A tibble: 1 × 3
##     `1` max                 min                
##   <dbl> <dttm>              <dttm>             
## 1     1 2022-11-29 23:59:50 2022-11-27 21:08:01
tweetsDF %<>% 
  mutate(Created_At_Round = created%>% 
           round(units = 'hours') %>% 
           as.POSIXct())

tweetsDF %>% pull(created) %>% min()
## [1] "2022-11-27 21:08:01 UTC"
tweetsDF %>% pull(created) %>% max()
## [1] "2022-11-29 23:59:50 UTC"

#plot of created trend tweets

plt <- tweetsDF %>% 
  dplyr::count(Created_At_Round) %>% 
  ggplot(mapping = aes(x = Created_At_Round, y = n)) +
  theme_light() +
  geom_line() +
  xlab(label = 'Date') +
  ylab(label = NULL) +
  ggtitle(label = "Number of Tweets per Hour")

plt %>% ggplotly()

#Frequency of tweets

Frequency <- ts_plot(tweetsDF, "hours") + 
  labs(x = NULL, y = NULL,
       title = "Frequency of tweets with a #wendesdaynetflix hashtag",
       subtitle = paste0(format(min(tweetsDF$created), "%d %B %Y"), " to ", 
                         format(max(tweetsDF$created),"%d %B %Y")),
       caption = "Data collected from Twitter's REST API via twitteR") +
  theme_minimal()

ggplotly(Frequency)

#Plot a graph (any graph you want)based on the type of device - #found in Source - that the user use. Include the legends.

#The encode source of tweets

#Plotting the tweet source

ggplot(tweetsDF[tweetsDF$tweetSource != 'others',], aes(tweetSource, fill = tweetSource)) +
  geom_bar() +
  theme(legend.position="right",
        axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylab("Number of tweets") +
  ggtitle("Tweets by Source")

#Create a wordcloud from the screenName #Convert into corpus

namesCorpus <- Corpus(VectorSource(tweetsDF$screenName))

#Extract the data for wordcloud

TweetCloud <- tweetsDF %>%
  select(screenName) %>%
  group_by(screenName) %>%
  summarize(count=n()) %>%
  arrange(desc(count))

#Running the code using the wordcloud()

wordcloud2(TweetCloud, 
           size=2, 
           color='random-dark',
           shape = 'Diamond')